############### ###############
## sumstats.R
## Project: CBO
## Author: Kamil Kouhen
## Purpose: Function for easy and basic descriptive statistics
## Date of creation: 01/05/2022
############### ###############

# Purpose: Function for easy and basic descriptive statistics
# This package depends on tidyverse, please keep this in mind before running

# 1. Checking if object fits function requirements --------------- ######

check.object = function(x){
  if (is.data.frame(x) | is_vector(x) | is_tibble(x)) {}
    else stop("Object needs to be a sourdata frame or a vector (e.g. dataframe column).")
}

# 2. Creating sub functions that will be used for sumstats (below) --------------- ######

  # min.k function
  min.k=function(x) {
    check.object(x)
    if (is.numeric(x)) format(round(min(x, na.rm=TRUE), digits = 2), big.mark=",", scientific=FALSE)
    else "."
  }

  # q1.k function
  q1.k=function(x) {
    check.object(x)
    if (is.numeric(x)) format(round(quantile(x, 0.25, na.rm=TRUE), digits = 2), big.mark=",", scientific=FALSE)
    else "."
  }

  # median.k function
  median.k=function(x) {
    check.object(x)
    if (is.numeric(x)) format(round(median(x, na.rm=TRUE), digits = 2), big.mark=",", scientific=FALSE)
    else "."
  }

  # mean.k function
  mean.k=function(x) {
    check.object(x)
    if(length(x[!is.na(x)]) != 0){
      if (is.numeric(x) | ((is.factor(x) & length(unique(x[!is.na(x)])) == 2 & max(nchar(as.character(x[!is.na(x)]))) == 1))){
        if ((min(as.numeric(as.character(x)), na.rm=TRUE)==0 | max(as.numeric(as.character(x)), na.rm=TRUE)==1) | is.numeric(x)){
          format(round(mean(as.numeric(as.character(x)), na.rm=TRUE), digits = 2), big.mark=",", scientific=FALSE)
        }
      }
      else "."
    }
    else "."
  }
  # q3.k function
  q3.k=function(x) {
    check.object(x)
    if (is.numeric(x)) format(round(quantile(x, 0.75, na.rm=TRUE), digits = 2), big.mark=",", scientific=FALSE)
    else "."
  }

  # max.k function
  max.k=function(x) {
    check.object(x)
    if (is.numeric(x)) format(round(max(x, na.rm=TRUE), digits = 2), big.mark=",", scientific=FALSE)
    else "."
  }

  # sd.k function
  sd.k=function(x) {
    check.object(x)
    if (is.numeric(x)) format(round(sd(x, na.rm=TRUE), digits = 2), big.mark=",", scientific=FALSE)
    else "."
  }

  # sharena.k function
  sharena.k=function(x) {
    check.object(x)
    round((sum(length(which(is.na(x))))/length(x)), digits = 4)
  }

  # nbna.k function
  nbna.k=function(x) {
    check.object(x)
    round((sum(length(which(is.na(x))))), digits = 4)
  }

  # shareemptychar.k function: share of values that are empty (factor or character)
  shareemptychar.k=function(x) {
    check.object(x)
    if (is.character(x) | is.factor(x)) round((sum(length(which(x == "")))/length(x)), digits = 4)
    else "."
  }

  # class function
  class.k=function(x) {
    check.object(x)
    if (length(class(x)) == 2) as.character(as.matrix(class(x))[2,]) #for labelled classes
    else class(x)
  }

# 3. Creating main function --------------- ######

sumstats=function(x){  # start function sumstats
  if (!("tidyverse" %in% .packages())){
    stop("tidyverse is needed for this function, please load it before using this function")
    break
  } else{
    check.object(x)
    if(typeof(x) != "list"){ #If only one variable
      sumtable = cbind(deparse(substitute(x)),
                       class.k(x),
                       min.k(x),
                       q1.k(x),
                       median.k(x),
                       mean.k(x), #Works with binary factor variables too
                       q3.k(x),
                       max.k(x),
                       sd.k(x),
                       sharena.k(x),
                       nbna.k(x),
                       shareemptychar.k(x))
      sumtable=as.data.frame(sumtable)
      names(sumtable)=c("variable","class","min","Q1","median","mean","Q3","MAX", "sd", "share.NA", "number.NA", "share.emptychar")
      return(sumtable)
    }
    else{ #if list of variable in dataframe
      if (length(x %>% select_if(is.character) %>% colnames) == length(x %>% colnames)){ ##If only character variables in var list only display class and number of NAs.
        sumtable = cbind(
                         sapply(x,class.k),
                         sapply(x,na.k),
                         sapply(x, shareemptychar.k(x)))
        sumtable=as.data.frame(sumtable)
        names(sumtable)=c("class", "share.NA", "share.emptychar")
        return(sumtable)
      }
      else{
        sumtable = cbind(
                         sapply(x,class.k),
                         sapply(x,min.k),
                         sapply(x,q1.k),
                         sapply(x,median.k),
                         sapply(x,mean.k), #Works with binary factor variables too
                         sapply(x,q3.k),
                         sapply(x,max.k),
                         sapply(x,sd.k),
                         sapply(x,sharena.k),
                         sapply(x,nbna.k),
                         sapply(x,shareemptychar.k))
        sumtable=as.data.frame(sumtable)
        names(sumtable)=c("class","min","Q1","median","mean","Q3","MAX", "sd", "share.NA", "number.NA", "share.emptychar")
        return(sumtable)
      }
    }
  }
}
